//对齐
#include<iostream>
#include<fstream>
#include<string>
#include<vector>
#include<sstream>
#include<math.h>
#include<Windows.h>
#include<immintrin.h>
using namespace std;

void string_to_num(string str, int row, int l, int** arr) {
	string s;
	int a;
	stringstream ss(str);
	while (ss >> s) {
		stringstream ts;
		ts << s;
		ts >> a;
		arr[row][l - a - 1] = 1;
	}
}

int get_first_1(int* arr, int size) {
	for (int i = 0; i < size; i++) {
		if (arr[i] == 1)
			return size - 1 - i;
		else
			continue;
	}
	return -1;
}

int _exist(int** E, int* Ed, int row, int line) {
	for (int i = 0; i < row; i++) {
		if (get_first_1(E[i], line) == get_first_1(Ed, line))
			return i;
	}
	return -1;
}

void special_Gauss_SSE(int** E, int** Ed, int row, int rowd, int line) {
	int count = row - rowd;
	long long head, tail, freq;
	double sum_time = 0.0;
	for (int i = 0; i < rowd; i++) {
		while (get_first_1(Ed[i], line) != -1) {
			int exist_or_not = _exist(E, Ed[i], row, line);
			if (exist_or_not != -1) {
				QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
				QueryPerformanceCounter((LARGE_INTEGER*)&head);
				int k;
				for (k = 0; k + 4 <= line; k += 4) {
					//Ed[i][k] = Ed[i][k] ^ E[exist_or_not][k];					
					__m128i t1 = _mm_loadu_si128((__m128i*)(Ed[i] + k));
					__m128i t2 = _mm_loadu_si128((__m128i*)(E[exist_or_not] + k));
					t1 = _mm_xor_si128(t1, t2);
					_mm_storeu_si128((__m128i*)(Ed[i] + k), t1);
				}
				for (; k < line; k++) {
					Ed[i][k] = Ed[i][k] ^ E[exist_or_not][k];
				}
				QueryPerformanceCounter((LARGE_INTEGER*)&tail);
				sum_time += (tail - head) * 1000.0 / freq;
			}
			else {
				for (int k = 0; k < line; k++) {
					E[count][k] = Ed[i][k];
				}
				count++;
				break;
			}
		}
	}
	cout << "The SSE takes:" << sum_time << "ms" << endl;
}

int main() {
    ifstream eliminate;//消元子
    ifstream eliminated;//被消元子
    ifstream data;//行列数据
    ofstream result;//结果
    int row, line;//消元子的行与列数
    int rowd, lined;//被消元子的行与列数
    data.open("C:\\Users\\lenovo\\Desktop\\Gause1\\readme.txt", ios::in);
    data >> line;
    data >> row;
    data >> rowd;
    lined = line;
    row += rowd;
    int** E = (int**)_aligned_malloc(row * sizeof(int*),16);
    for (int i = 0; i < row; i++)
        E[i] = (int*)_aligned_malloc(line * sizeof(int),16);
    for (int i = 0; i < row; i++) {
        for (int j = 0; j < line; j++) {
            E[i][j] = 0;
        }
    }//消元矩阵初始化
    int** Ed = (int**)_aligned_malloc(rowd * sizeof(int*), 16);
    for (int i = 0; i < rowd; i++)
        Ed[i] = (int*)_aligned_malloc(lined * sizeof(int), 16);
    for (int i = 0; i < rowd; i++) {
        for (int j = 0; j < lined; j++) {
            Ed[i][j] = 0;//被消元矩阵初始化
        }
    }

    eliminate.open("C:\\Users\\lenovo\\Desktop\\Gause1\\消元子.txt", ios::in);
    if (!eliminate.is_open()) {
        cout << "消元子文件打开失败" << endl;
        return 1;
    }
    vector<string> elte;
    string temp1;
    while (getline(eliminate, temp1))
        elte.push_back(temp1);
    eliminate.close();
    for (int i = 0; i < elte.size(); i++)
        string_to_num(elte[i], i, line, E);
    eliminated.open("C:\\Users\\lenovo\\Desktop\\Gause1\\被消元行.txt", ios::in);
    if (!eliminated.is_open()) {
        cout << "被消元行文件打开失败" << endl;
        return 1;
    }
    vector<string> elted;
    string temp2;
    while (getline(eliminated, temp2))
        elted.push_back(temp2);
    eliminated.close();
    for (int i = 0; i < elted.size(); i++)
        string_to_num(elted[i], i, lined, Ed);

    special_Gauss_SSE(E, Ed, row, rowd, line); // 调用特殊高斯消元SSE方法
    result.open("C:\\Users\\lenovo\\Desktop\\Gause1\\消元结果.txt", ios::out);
    for (int i = 0; i < row; i++)
    {
        for (int j = 0; j < line; j++)
        {
            result << E[i][j];
        }
        result << endl;
    }
    // 释放内存
    for (int i = 0; i < row; i++)
        _aligned_free(E[i]);
    _aligned_free(E);
    for (int i = 0; i < rowd; i++)
        _aligned_free(Ed[i]);
    _aligned_free(Ed);
    return 0;
}
/*#include<iostream>
#include<fstream>
#include<string>
#include<vector>
#include<sstream>
#include<math.h>
#include<Windows.h>
#include<immintrin.h>
using namespace std;

void string_to_num(string str, int row, int l, int** arr) {
	string s;
	int a;
	stringstream ss(str);
	while (ss >> s) {
		stringstream ts;
		ts << s;
		ts >> a;
		arr[row][l - a - 1] = 1;
	}
}

int get_first_1(int* arr, int size) {
	for (int i = 0; i < size; i++) {
		if (arr[i] == 1)
			return size - 1 - i;
		else
			continue;
	}
	return -1;
}

int _exist(int** E, int* Ed, int row, int line) {
	for (int i = 0; i < row; i++) {
		if (get_first_1(E[i], line) == get_first_1(Ed, line))
			return i;
	}
	return -1;
}

void special_Gauss_SSE(int** E, int** Ed, int row, int rowd, int line) {
	int count = row - rowd;
	long long head, tail, freq;
	double sum_time = 0.0;
	for (int i = 0; i < rowd; i++) {
		while (get_first_1(Ed[i], line) != -1) {
			int exist_or_not = _exist(E, Ed[i], row, line);
			if (exist_or_not != -1) {
				QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
				QueryPerformanceCounter((LARGE_INTEGER*)&head);
				int k;
				for (k = 0; k + 4 <= line; k += 4) {
					//Ed[i][k] = Ed[i][k] ^ E[exist_or_not][k];					
					__m128i t1 = _mm_loadu_si128((__m128i*)(Ed[i] + k));
					__m128i t2 = _mm_loadu_si128((__m128i*)(E[exist_or_not] + k));
					t1 = _mm_xor_si128(t1, t2);
					_mm_storeu_si128((__m128i*)(Ed[i] + k), t1);
				}
				for (; k < line; k++) {
					Ed[i][k] = Ed[i][k] ^ E[exist_or_not][k];
				}
				QueryPerformanceCounter((LARGE_INTEGER*)&tail);
				sum_time += (tail - head) * 1000.0 / freq;
			}
			else {
				for (int k = 0; k < line; k++) {
					E[count][k] = Ed[i][k];
				}
				count++;
				break;
			}
		}
	}
	cout << "The SSE takes:" << sum_time << "ms" << endl;
}

int main() {
    ifstream eliminate;//消元子
    ifstream eliminated;//被消元子
    ifstream data;//行列数据
    ofstream result;//结果
    int row, line;//消元子的行与列数
    int rowd, lined;//被消元子的行与列数
    data.open("C:\\Users\\lenovo\\Desktop\\Gause4\\readme.txt", ios::in);
    data >> line;
    data >> row;
    data >> rowd;
    lined = line;
    row += rowd;
    int** E = new int* [row];
    for (int i = 0; i < row; i++)
        E[i] = new int[line];
    for (int i = 0; i < row; i++) {
        for (int j = 0; j < line; j++) {
            E[i][j] = 0;
        }
    }//消元矩阵初始化
    int** Ed = new int* [rowd];
    for (int i = 0; i < rowd; i++)
        Ed[i] = new int[lined];
    for (int i = 0; i < rowd; i++) {
        for (int j = 0; j < lined; j++) {
            Ed[i][j] = 0;//被消元矩阵初始化
        }
    }

    eliminate.open("C:\\Users\\lenovo\\Desktop\\Gause4\\消元子.txt", ios::in);
    if (!eliminate.is_open()) {
        cout << "消元子文件打开失败" << endl;
        return 1;
    }
    vector<string> elte;
    string temp1;
    while (getline(eliminate, temp1))
        elte.push_back(temp1);
    eliminate.close();
    for (int i = 0; i < elte.size(); i++)
        string_to_num(elte[i], i, line, E);
    eliminated.open("C:\\Users\\lenovo\\Desktop\\Gause4\\被消元行.txt", ios::in);
    if (!eliminated.is_open()) {
        cout << "被消元行文件打开失败" << endl;
        return 1;
    }
    vector<string> elted;
    string temp2;
    while (getline(eliminated, temp2))
        elted.push_back(temp2);
    eliminated.close();
    for (int i = 0; i < elted.size(); i++)
        string_to_num(elted[i], i, lined, Ed);

    special_Gauss_SSE(E, Ed, row, rowd, line); // 调用特殊高斯消元SSE方法
    result.open("C:\\Users\\lenovo\\Desktop\\Gause4\\消元结果.txt", ios::out);
    for (int i = 0; i < row; i++)
    {
        for (int j = 0; j < line; j++)
        {
            result << E[i][j];
        }
        result << endl;
    }
    // 释放内存
    for (int i = 0; i < row; i++)
        delete[] E[i];
    delete[] E;
    for (int i = 0; i < rowd; i++)
        delete[] Ed[i];
    delete[]Ed;
    return 0;
}*/